Seahorse data analysis


1 Preparation

1.1 Required packages

library(tidyverse)
library(ggsignif)
library(broom)
library(ComplexHeatmap)
library(RColorBrewer)
library(heatmaply)
library(factoextra)
library(plotly)
library(GGally)
# devtools::install_github('vqv/ggbiplot')
library(ggbiplot)
library(corrplot)

1.2 Functions

  • The functions are stored in a separate script
  • The following functions were written:
  • Timeline plot
  • Density plot
  • Barplot
source(here::here("Functions", "Functions.R"))

GGscatterPlot <- function(data, mapping, ..., method = "spearman") {
    
    # Get correlation coefficient
    x <- GGally::eval_data_col(data, mapping$x)
    y <- GGally::eval_data_col(data, mapping$y)
    
    cor <- cor(x, y, method = method)
    # Assemble data frame
    df <- data.frame(x = x, y = y)
    # PCA
    nonNull <- x != 0 & y != 0
    dfpc <- prcomp(~x + y, df[nonNull, ])
    df$cols <- predict(dfpc, df)[, 1]
    # Define the direction of color range based on PC1 orientation:
    dfsum <- x + y
    colDirection <- ifelse(dfsum[which.max(df$cols)] < dfsum[which.min(df$cols)], 
        1, -1)
    # Get 2D density for alpha
    dens2D <- MASS::kde2d(df$x, df$y)
    df$density <- fields::interp.surface(dens2D, df[, c("x", "y")])
    
    if (any(df$density == 0)) {
        mini2D = min(df$density[df$density != 0])  #smallest non zero value
        df$density[df$density == 0] <- mini2D
    }
    # Prepare plot
    pp <- ggplot(df, aes(x = x, y = y, color = cols, alpha = 1/density)) + ggplot2::geom_point(shape = 16, 
        show.legend = FALSE) + ggplot2::scale_color_viridis_c(direction = colDirection) + 
        # scale_color_gradient(low = '#0091ff', high = '#f0650e') +
    ggplot2::scale_alpha(range = c(0.05, 0.6)) + ggplot2::geom_abline(intercept = 0, 
        slope = 1, col = "darkred") + ggplot2::geom_label(data = data.frame(xlabel = min(x, 
        na.rm = TRUE), ylabel = max(y, na.rm = TRUE), lab = round(cor, digits = 3)), 
        mapping = ggplot2::aes(x = xlabel, y = ylabel, label = lab), hjust = 0, vjust = 1, 
        size = 3, fontface = "bold", inherit.aes = FALSE  # do not inherit anything from the ...
) + 
        theme_minimal()
    
    return(pp)
}

2 Data preparation

2.1 Load and combine data

  • Data from Gabriel (Lifespan_data)
  • Data from Seahorse analysis
  • Merge (be careful that you merge the correct samples!)
  • PPR measurements removed (they were used to calculate ATP production)
## Load Gabriels data table
data_Gab <- read_csv(here::here("Data", "Lifespan_Study_Data_updated.csv")) %>% filter(Treatment %in% 
    c("Control_21", "Control_3")) %>% filter(Cell_Line %in% c("hFB11", "hFB12", "hFB13")) %>% 
    filter(Date %in% c("03/07/2019", "03/21/2019", "04/04/2019", "04/18/2019")) %>% 
    mutate(Date_of_Experiment = case_when(Date == "03/07/2019" ~ "2019.03.08", Date == 
        "03/21/2019" ~ "2019.03.22", Date == "04/04/2019" ~ "2019.04.05", Date == 
        "04/18/2019" ~ "2019.04.19")) %>% dplyr::rename(Treatment_Oxygen = Treatment) %>% 
    mutate(Treatment = case_when(Treatments == "Control" ~ "Ctrl")) %>% select(-Treatments) %>% 
    droplevels()

## Remove columns that only contain NAs
data_Gab <- data_Gab %>% select_if(~!sum(is.na(.)) >= nrow(data_Gab))

## Load the Seahorse data
data_Seahorse <- list.files(here::here("Data"), pattern = "*Final.csv", full.names = T) %>% 
    lapply(read_csv) %>% bind_rows %>% separate(X1, into = c("Cell_Line", "Percent_Oxygen", 
    "Treatment")) %>% select(-contains("PPR")) %>% filter(Treatment %in% "Ctrl") %>% 
    droplevels()
data_Seahorse$Percent_Oxygen <- as.numeric(data_Seahorse$Percent_Oxygen)

## Finally combine both datasets
data_combined <- full_join(data_Gab, data_Seahorse, by = c("Cell_Line", "Date_of_Experiment", 
    "Percent_Oxygen", "Treatment")) %>% unite(Group, Cell_Line, Percent_Oxygen, remove = FALSE)

## Remove the percent sign from Telomere_Length_CV
data_combined$Telomere_Length_CV <- as.numeric(str_remove(data_combined$Telomere_Length_CV, 
    "%"))
data_combined$Cell_Line <- as.factor(data_combined$Cell_Line)
data_combined$Percent_Oxygen <- as.factor(data_combined$Percent_Oxygen)

2.2 Check notes

  • Gabriel has a column called “notes”
  • Here we retrieve further information for the subset of samples we’re using
  • From the notes column we see that hFB11 at 62 days grown was infected
  • Will be excluded
test <- data_combined %>% select(Cell_Line, Percent_Oxygen, Days_Grown, Passage, 
    Notes)
test[which(!test$Notes == "NA"), ]
# A tibble: 1 x 5
  Cell_Line Percent_Oxygen Days_Grown Passage Notes   
  <fct>     <fct>               <dbl>   <dbl> <chr>   
1 hFB11     21                     62      15 Infected
data_combined <- data_combined %>% filter(!c(Cell_Line == "hFB11" & Passage == "15")) %>% 
    select(-Notes)

2.3 Remove empty columns without data

  • Also remove RNA seq info
## Remove columns that only contain NAs
data_combined <- data_combined %>% select_if(~!sum(is.na(.)) >= nrow(data_combined) - 
    1) %>% select(-contains("RNAseq"))

2.4 Choose independent and dependent variables

InDependent_vars <- c("Sample", "Cell_Line_Group", "Unique_Variable_Name", "Group", 
    "Cell_Line", "Cell_Type", "Sex", "Clinical_Condition", "Study_Part", "Replicate_Line", 
    "Treatment", "Treatment_Oxygen", "Percent_Oxygen", "Date", "Time", "Date_1", 
    "pre_study_doublings", "Cells_Plated", "Cells_Counted", "Days_Per_Passage", "basename", 
    "Date_of_Experiment", "Experiment_Name", "mitotic_age_per_day", "mitotic_age_per_doubling", 
    "Donor_Age", "Donor_Age_with_gestation", "Passage", "Days_Grown", "Intial_Mitotic_Age", 
    "pre_study_days_grown", "pre_study_doublings")

Dependent_vars <- colnames(data_combined)[!colnames(data_combined) %in% c(InDependent_vars)]
dta <- qpcR:::cbind.na(InDependent_vars, Dependent_vars)
dta[is.na(dta)] <- " "
knitr::kable(dta, caption = "")
Table 2.1:
InDependent_vars Dependent_vars
Sample Divisions_Per_Passage
Cell_Line_Group Population_Doublings
Unique_Variable_Name MiAge_Population_Doublings
Group MiAge_Days_Grown
Cell_Line Population_Doubling_Time
Cell_Type Doubling_Rate
Sex Cell_Size
Clinical_Condition Cell_Volume
Study_Part Percent_Dead
Replicate_Line Telomere_Length
Treatment Telomere_Length_CV
Treatment_Oxygen IL6
Percent_Oxygen IL6_Normalized
Date Copy_Number
Time cf_mtDNA
Date_1 cf_nDNA
pre_study_doublings PanTissue_Clock
Cells_Plated PanTissue_Normalized
Cells_Counted Hannum_Clock
Days_Per_Passage Skin_Blood_Clock
basename PhenoAge_Clock
Date_of_Experiment GrimAge_Clock
Experiment_Name GrimAge_Normalized
mitotic_age_per_day DNAmTL
mitotic_age_per_doubling Mitotic_Age
Donor_Age DunedinPoAm_Age
Donor_Age_with_gestation Baseline_Respiration
Passage NonMitochondrial_Respiration
Days_Grown Basal_Respiration
Intial_Mitotic_Age Max_Respiration
pre_study_days_grown Spare_Respiration
pre_study_doublings Proton_Leak
ATPlinked_Respiration
Coupling_Efficiency
Baseline_ECAR
Max_ECAR
Spare_ECAR
ATPtotal
ATPglyc
ATPox
ATPtotal_max
ATPglyc_max
ATPox_max
ATPtotal_spare
ATPox_spare
ATPglyc_spare
Resting_Metabolic_Rate
Max_Metabolic_Rate
PicoWatts
Max_PicoWatts
OCRcoupled

2.5 Choose subgroups

  • TBD
  • I would like to divide the dataset into subgroups and compare for instance Mito vars with Age and time vars
Age_time_vars <- c("MiAge_Population_Doublings", "MiAge_Days_Grown", "Telomere_Length", 
    "Telomere_Length_CV", "PanTissue_Clock", "PanTissue_Normalized", "Hannum_Clock", 
    "Skin_Blood_Clock", "PhenoAge_Clock", "GrimAge_Clock", "GrimAge_Normalized", 
    "DNAmTL", "Mitotic_Age", "DunedinPoAm_Age")
Seahorse_vars <- c("Baseline_Respiration", "NonMitochondrial_Respiration", "Basal_Respiration", 
    "Max_Respiration", "Spare_Respiration", "Proton_Leak", "ATPlinked_Respiration", 
    "Coupling_Efficiency", "Baseline_ECAR", "Max_ECAR", "Spare_ECAR", "ATPtotal", 
    "ATPglyc", "ATPox", "ATPtotal_max", "ATPglyc_max", "ATPox_max", "ATPtotal_spare", 
    "ATPox_spare", "ATPglyc_spare", "Resting_Metabolic_Rate", "Max_Metabolic_Rate", 
    "PicoWatts", "Max_PicoWatts", "OCRcoupled")
Glyc_vars <- c("NonMitochondrial_Respiration", "Baseline_ECAR", "Max_ECAR", "Spare_ECAR", 
    "ATPglyc", "ATPtotal_max", "ATPglyc_max", "ATPglyc_spare", "ATPtotal_max", "ATPglyc_max", 
    "ATPox_max", "ATPtotal_spare", "ATPox_spare", "ATPglyc_spare")
Mito_vars <- c("OCRcoupled", "ATPox_max", "ATPox_spare", "ATPox", "Proton_Leak", 
    "Basal_Respiration", "Max_Respiration", "Spare_Respiration", "ATPlinked_Respiration", 
    "Coupling_Efficiency", "Baseline_Respiration", "Copy_Number", "cf_mtDNA", "cf_nDNA")
ATP_vars <- c("ATPlinked_Respiration", "ATPtotal", "ATPglyc", "ATPox")
Doubling_vars <- c("Divisions_Per_Passage", "Population_Doublings", "Population_Doubling_Time", 
    "Doubling_Rate")
Cell_vars <- c("Cell_Size", "Cell_Volume", "Percent_Dead", "Divisions_Per_Passage", 
    "Population_Doublings", "Population_Doubling_Time", "Doubling_Rate")
Immune_vars <- c("IL6", "IL6_Normalized", "cf_mtDNA", "cf_nDNA")

dta <- qpcR:::cbind.na(Age_time_vars, Seahorse_vars, Glyc_vars, Mito_vars, ATP_vars, 
    Doubling_vars, Cell_vars, Immune_vars)
dta[is.na(dta)] <- " "
knitr::kable(dta, caption = "")
Table 2.2:
Age_time_vars Seahorse_vars Glyc_vars Mito_vars ATP_vars Doubling_vars Cell_vars Immune_vars
MiAge_Population_Doublings Baseline_Respiration NonMitochondrial_Respiration OCRcoupled ATPlinked_Respiration Divisions_Per_Passage Cell_Size IL6
MiAge_Days_Grown NonMitochondrial_Respiration Baseline_ECAR ATPox_max ATPtotal Population_Doublings Cell_Volume IL6_Normalized
Telomere_Length Basal_Respiration Max_ECAR ATPox_spare ATPglyc Population_Doubling_Time Percent_Dead cf_mtDNA
Telomere_Length_CV Max_Respiration Spare_ECAR ATPox ATPox Doubling_Rate Divisions_Per_Passage cf_nDNA
PanTissue_Clock Spare_Respiration ATPglyc Proton_Leak Population_Doublings
PanTissue_Normalized Proton_Leak ATPtotal_max Basal_Respiration Population_Doubling_Time
Hannum_Clock ATPlinked_Respiration ATPglyc_max Max_Respiration Doubling_Rate
Skin_Blood_Clock Coupling_Efficiency ATPglyc_spare Spare_Respiration
PhenoAge_Clock Baseline_ECAR ATPtotal_max ATPlinked_Respiration
GrimAge_Clock Max_ECAR ATPglyc_max Coupling_Efficiency
GrimAge_Normalized Spare_ECAR ATPox_max Baseline_Respiration
DNAmTL ATPtotal ATPtotal_spare Copy_Number
Mitotic_Age ATPglyc ATPox_spare cf_mtDNA
DunedinPoAm_Age ATPox ATPglyc_spare cf_nDNA
ATPtotal_max
ATPglyc_max
ATPox_max
ATPtotal_spare
ATPox_spare
ATPglyc_spare
Resting_Metabolic_Rate
Max_Metabolic_Rate
PicoWatts
Max_PicoWatts
OCRcoupled

3 Data normalization

3.1 Zscore transform

\[ Z = \frac{x-\mu}\sigma \] \[Z = standard\ score; \ x = observed\ value; \ \mu = mean\ of\ the\ sample; \ \sigma = standard\ deviation\ of\ the\ sample\] - The result should be a dataframe called “data_zscore”

3.2 Baseline normalization/Fold change

  • At each time point, for each cell line, set control (21% oxygen) to 100%
data_long <- data_combined %>% pivot_longer(cols = paste(Dependent_vars), names_to = "feature", 
    values_to = "value")

data_FC <- data_long %>% filter(Percent_Oxygen %in% "21") %>% dplyr::rename(baseline = value) %>% 
    select(-c("Sample", "Cell_Line_Group", "Unique_Variable_Name", "Group", "Cell_Type", 
        "Sex", "Clinical_Condition", "Donor_Age", "Donor_Age_with_gestation", "Study_Part", 
        "Replicate_Line", "Percent_Oxygen", "Date", "Time", "Date_1", "Treatment_Oxygen", 
        "mitotic_age_per_day", "mitotic_age_per_doubling", "Intial_Mitotic_Age", 
        "pre_study_days_grown", "pre_study_doublings", "Cells_Plated", "Cells_Counted", 
        "Days_Per_Passage", "basename", "Experiment_Name")) %>% full_join(data_long, 
    by = c("Cell_Line", "Days_Grown", "Treatment", "Passage", "Date_of_Experiment", 
        "feature")) %>% mutate(Normalized = (value/baseline) * 100) %>% select(-c(baseline, 
    value)) %>% pivot_wider(names_from = "feature", values_from = "Normalized")
data_FC <- na_if(data_FC, Inf)
data_FC <- na_if(data_FC, -Inf)

3.3 Log2 Fold change

  • Note: Don’t normalize based on days grown, for some experiments this factor differs by one day. Better to base the normalization on date of experiment and passage
  • The result should be a dataframe called “data_logFC”
data_long <- data_combined %>% pivot_longer(cols = paste(Dependent_vars), names_to = "feature", 
    values_to = "value")  #%>%
# filter(Cell_Line %in% 'hFB12') %>% filter(Date_of_Experiment %in% '2019.04.19'
# ) %>% filter(feature %in% 'Telomere_Length')

data_logFC <- data_long %>% filter(Percent_Oxygen %in% "21") %>% dplyr::rename(baseline = value) %>% 
    select(-c("Sample", "Cell_Line_Group", "Unique_Variable_Name", "Group", "Cell_Type", 
        "Sex", "Clinical_Condition", "Donor_Age", "Donor_Age_with_gestation", "Study_Part", 
        "Replicate_Line", "Treatment_Oxygen", "Date", "Time", "Percent_Oxygen", "Date_1", 
        "mitotic_age_per_day", "mitotic_age_per_doubling", "Intial_Mitotic_Age", 
        "pre_study_days_grown", "pre_study_doublings", "Cells_Plated", "Cells_Counted", 
        "Days_Per_Passage", "basename", "Experiment_Name", "Days_Grown")) %>% full_join(data_long, 
    by = c("Cell_Line", "Treatment", "Passage", "Date_of_Experiment", "feature")) %>% 
    mutate(Normalized = log2(value/baseline)) %>% select(-c(baseline, value)) %>% 
    pivot_wider(names_from = "feature", values_from = "Normalized")
data_logFC <- na_if(data_logFC, Inf)
data_logFC <- na_if(data_logFC, -Inf)

4 Timeline

4.1 Raw data

Var_choice <- all_of(Dependent_vars)
data_combined %>% timeline_plot()

4.2 Normalized data (Fold change to 21% oxygen)

data_FC %>% timeline_plot()


5 Multivariate data visualization

5.1 Split data into hypoxia and normoxia data

  • Only numeric variables
  • This is used for heatmaps, correlation plots…
  • A list of numerical variables is shown in results
nums <- unlist(lapply(data_combined, is.numeric))
nums_vars <- colnames(data_combined[, nums])
nums_vars <- nums_vars[!nums_vars %in% c("Sample", "Study_Part")]
print(nums_vars)
 [1] "Donor_Age"                    "Donor_Age_with_gestation"    
 [3] "Replicate_Line"               "Passage"                     
 [5] "Days_Grown"                   "mitotic_age_per_day"         
 [7] "mitotic_age_per_doubling"     "Intial_Mitotic_Age"          
 [9] "pre_study_days_grown"         "pre_study_doublings"         
[11] "Cells_Plated"                 "Cells_Counted"               
[13] "Days_Per_Passage"             "Divisions_Per_Passage"       
[15] "Population_Doublings"         "MiAge_Population_Doublings"  
[17] "MiAge_Days_Grown"             "Population_Doubling_Time"    
[19] "Doubling_Rate"                "Cell_Size"                   
[21] "Cell_Volume"                  "Percent_Dead"                
[23] "Telomere_Length"              "Telomere_Length_CV"          
[25] "IL6"                          "IL6_Normalized"              
[27] "Copy_Number"                  "cf_mtDNA"                    
[29] "cf_nDNA"                      "PanTissue_Clock"             
[31] "PanTissue_Normalized"         "Hannum_Clock"                
[33] "Skin_Blood_Clock"             "PhenoAge_Clock"              
[35] "GrimAge_Clock"                "GrimAge_Normalized"          
[37] "DNAmTL"                       "Mitotic_Age"                 
[39] "DunedinPoAm_Age"              "Baseline_Respiration"        
[41] "NonMitochondrial_Respiration" "Basal_Respiration"           
[43] "Max_Respiration"              "Spare_Respiration"           
[45] "Proton_Leak"                  "ATPlinked_Respiration"       
[47] "Coupling_Efficiency"          "Baseline_ECAR"               
[49] "Max_ECAR"                     "Spare_ECAR"                  
[51] "ATPtotal"                     "ATPglyc"                     
[53] "ATPox"                        "ATPtotal_max"                
[55] "ATPglyc_max"                  "ATPox_max"                   
[57] "ATPtotal_spare"               "ATPox_spare"                 
[59] "ATPglyc_spare"                "Resting_Metabolic_Rate"      
[61] "Max_Metabolic_Rate"           "PicoWatts"                   
[63] "Max_PicoWatts"                "OCRcoupled"                  
data_normoxia <- data_combined %>% filter(Percent_Oxygen %in% "21")
numdata_normoxia <- data_normoxia[, c(paste(nums_vars))] %>% select(-c("mitotic_age_per_day", 
    "mitotic_age_per_doubling", "Replicate_Line"))

data_hypoxia <- data_combined %>% filter(Percent_Oxygen %in% "3")
numdata_hypoxia <- data_hypoxia[, c(paste(nums_vars))] %>% select(-c("mitotic_age_per_day", 
    "mitotic_age_per_doubling", "Replicate_Line"))

5.2 Define number of k-means cluster Normoxia data

set.seed(123)

data.scaled <- scale(numdata_normoxia[, Dependent_vars])
# Optimal number of clusters in the data

### Elbow method (look at the knee) Elbow method for kmeans
factoextra::fviz_nbclust(data.scaled, kmeans, method = "wss") + geom_vline(xintercept = 2, 
    linetype = 2) + labs(title = "Elbow method for kmeans Normoxia data")

# Average silhouette for kmeans
factoextra::fviz_nbclust(data.scaled, kmeans, method = "silhouette") + labs(title = "Average silhouette for kmeans Normoxia data")

### Gap statistic
library(cluster)
set.seed(123)
# Compute gap statistic for kmeans
gap_stat <- clusGap(data.scaled, FUN = kmeans, nstart = 25, K.max = 10, B = 500)
print(gap_stat, method = "firstmax")
Clustering Gap statistic ["clusGap"] from call:
clusGap(x = data.scaled, FUNcluster = kmeans, K.max = 10, B = 500,     nstart = 25)
B=500 simulated reference sets, k = 1..10; spaceH0="scaledPCA"
 --> Number of clusters (method 'firstmax'): 1
           logW        E.logW        gap     SE.sim
 [1,] 3.1986802  3.1664514389 -0.0322288 0.07213914
 [2,] 3.0082048  2.8687680735 -0.1394367 0.06897213
 [3,] 2.8147803  2.6388288986 -0.1759514 0.07438674
 [4,] 2.6146435  2.4183011802 -0.1963423 0.08097463
 [5,] 2.4010371  2.1879073332 -0.2131297 0.08846667
 [6,] 2.1750176  1.9350252633 -0.2399924 0.09525075
 [7,] 1.8973493  1.6439618515 -0.2533875 0.10279490
 [8,] 1.5680304  1.2835718921 -0.2844585 0.11646284
 [9,] 1.1030711  0.7970592104 -0.3060118 0.13960774
[10,] 0.2321638 -0.0003292294 -0.2324930 0.19157945
factoextra::fviz_gap_stat(gap_stat) + labs(title = "Gap statistics for kmeans Normoxia data")

# Gap statistic for hierarchical clustering
gap_stat <- clusGap(data.scaled, FUN = hcut, K.max = 10, B = 10)
fviz_gap_stat(gap_stat) + labs(title = "Gap statistics for hclust Normoxia data")

# }

5.3 Define number of k-means cluster Hypoxia data

set.seed(123)

data.scaled <- scale(numdata_hypoxia[, Dependent_vars])
# Optimal number of clusters in the data

### Elbow method (look at the knee) Elbow method for kmeans
factoextra::fviz_nbclust(data.scaled, kmeans, method = "wss") + geom_vline(xintercept = 2, 
    linetype = 2) + labs(title = "Elbow method for kmeans Hypoxia data")

# Average silhouette for kmeans
factoextra::fviz_nbclust(data.scaled, kmeans, method = "silhouette") + labs(title = "Average silhouette for kmeans Hypoxia data")

### Gap statistic
library(cluster)
set.seed(123)
# Compute gap statistic for kmeans
gap_stat <- clusGap(data.scaled, FUN = kmeans, nstart = 25, K.max = 10, B = 500)
print(gap_stat, method = "firstmax")
Clustering Gap statistic ["clusGap"] from call:
clusGap(x = data.scaled, FUNcluster = kmeans, K.max = 10, B = 500,     nstart = 25)
B=500 simulated reference sets, k = 1..10; spaceH0="scaledPCA"
 --> Number of clusters (method 'firstmax'): 1
           logW       E.logW         gap     SE.sim
 [1,] 3.1925812  3.168813639 -0.02376758 0.07745762
 [2,] 2.9604606  2.843781526 -0.11667906 0.06582318
 [3,] 2.7828592  2.617196191 -0.16566302 0.07004801
 [4,] 2.5710597  2.400967019 -0.17009264 0.07707509
 [5,] 2.3383360  2.175045315 -0.16329067 0.08420521
 [6,] 2.0798456  1.926005485 -0.15384007 0.09249840
 [7,] 1.8180286  1.635827822 -0.18220076 0.10072828
 [8,] 1.4788615  1.278847631 -0.20001391 0.11619768
 [9,] 1.0082776  0.793714845 -0.21456280 0.13959036
[10,] 0.1310684 -0.003429018 -0.13449737 0.18814198
factoextra::fviz_gap_stat(gap_stat) + labs(title = "Gap statistics for kmeans Hypoxia data")

# Gap statistic for hierarchical clustering
gap_stat <- clusGap(data.scaled, FUN = hcut, K.max = 10, B = 10)
fviz_gap_stat(gap_stat) + labs(title = "Gap statistics for hclust Hypoxia data")

# }

5.4 Heatmap raw data

Scaled within each feature: - centering is done by subtracting the column means (omitting NAs) of x from their corresponding columns - scaling is done by dividing the (centered) columns of x by their standard deviations

dataHM <- data_combined %>% 
  unite(heatmapgp, Cell_Line, Percent_Oxygen,  Days_Grown, sep="_", remove=F) %>%
  column_to_rownames("heatmapgp")

dataHM <- dataHM[ c("hFB11_21_20", "hFB11_21_34", "hFB11_21_48",
                        "hFB11_3_20", "hFB11_3_34", "hFB11_3_48",
                        "hFB12_21_20", "hFB12_21_34", "hFB12_21_48", "hFB12_21_62", 
                        "hFB12_3_20",  "hFB12_3_34",  "hFB12_3_48",  "hFB12_3_63",
                        "hFB13_21_21", "hFB13_21_35","hFB13_21_49","hFB13_21_63", 
                        "hFB13_3_21",  "hFB13_3_35", "hFB13_3_49",  "hFB13_3_63" 
                        ),]
dataHM$Percent_Oxygen <- as.character(dataHM$Percent_Oxygen)

#meta_data <- dataHM[,colnames(dataHM) %in% meta_names]
exprs <- dataHM[,colnames(dataHM) %in% Dependent_vars]
exprs <- t(scale(exprs))


column_ha <- HeatmapAnnotation(Donor = anno_block(gp = gpar(1:3), labels = c("Donor1", "Donor2", "Donor3")),
                               Days_grown = anno_barplot(as.numeric(dataHM$Days_Grown)),
                               Oxygen = dataHM$Percent_Oxygen,
                               col=list(Oxygen =  c("21"= "steelblue3", "3"= "mistyrose3")))

ht <- Heatmap(exprs,
              col=colorRampPalette(rev(c('#a50026','#d73027','#f46d43','#fdae61','#fee090','white','#e0f3f8','#abd9e9','#74add1','#4575b4','#313695')))(256),
              #col=colorRampPalette(rev(brewer.pal(20, "RdYlBu")))(256),
              top_annotation = column_ha,
              show_column_names=F,
              column_split = factor(dataHM$Cell_Line, levels = unique(dataHM$Cell_Line)),
              row_names_gp = gpar(col = "black",fontsize = 6),
              cluster_columns=FALSE)
    
ht = draw(ht)
Figure: Here is a really important caption.

Figure 5.1: Figure: Here is a really important caption.

5.5 Heatmap with log2FC

dataHM <- data_logFC %>% 
  unite(heatmapgp, Cell_Line, Percent_Oxygen,  Days_Grown, sep="_", remove=F) %>%
  column_to_rownames("heatmapgp") %>%
  filter(Percent_Oxygen %in% "3")

dataHM <- dataHM[ c("hFB11_3_20", "hFB11_3_34", "hFB11_3_48",
                    "hFB12_3_20",  "hFB12_3_34",  "hFB12_3_48",  "hFB12_3_63",
                    "hFB13_3_21",  "hFB13_3_35", "hFB13_3_49",  "hFB13_3_63" 
                        ),]
dataHM$Percent_Oxygen <- as.character(dataHM$Percent_Oxygen)

#meta_data <- dataHM[,colnames(dataHM) %in% meta_names]
exprs <- dataHM[,colnames(dataHM) %in%  Dependent_vars]
exprs <- t(exprs)


column_ha <- HeatmapAnnotation(Donor = anno_block(gp = gpar(1:3), labels = c("Donor1", "Donor2", "Donor3")),
                               Days_grown = anno_barplot(as.numeric(dataHM$Days_Grown))
                               )

ht <- Heatmap(exprs,
              col=colorRampPalette(rev(c('#a50026','#d73027','#f46d43','#fdae61','#fee090','white','#e0f3f8','#abd9e9','#74add1','#4575b4','#313695')))(256),
              #col=colorRampPalette(rev(brewer.pal(20, "RdYlBu")))(256),
              top_annotation = column_ha,
              show_column_names=F,
              column_split = factor(dataHM$Cell_Line, levels = unique(dataHM$Cell_Line)),
              row_names_gp = gpar(col = "black",fontsize = 6),
              cluster_columns=FALSE)
    
ht = draw(ht)

5.6 Higher or lower in hypoxia

Higher_in_hypoxia <- c("cf_nDNA", "ATPglyc", "Baseline_ECAR", "NonMitochondrial_Respiration", 
    "IL6_Normalized", "IL6", "cf_mtDNA")
Lower_in_hypoxia <- c("ATPtotal_spare", "Basal_Respiration", "ATPox", "ATPlinked_Respiration", 
    "OCRcoupled", "Max_Metabolic_Rate", "Max_Respiration", "ATPox_max", "Spare_Respiration", 
    "ATPox_spare", "Copy_Number", "Telomere_Length_CV")

dta <- qpcR:::cbind.na(Higher_in_hypoxia, Lower_in_hypoxia)
dta[is.na(dta)] <- " "
knitr::kable(dta, caption = "")
Table 5.1:
Higher_in_hypoxia Lower_in_hypoxia
cf_nDNA ATPtotal_spare
ATPglyc Basal_Respiration
Baseline_ECAR ATPox
NonMitochondrial_Respiration ATPlinked_Respiration
IL6_Normalized OCRcoupled
IL6 Max_Metabolic_Rate
cf_mtDNA Max_Respiration
ATPox_max
Spare_Respiration
ATPox_spare
Copy_Number
Telomere_Length_CV

5.7 PCA

5.7.1 Days grown grouping

datapca <- data_combined[,c("Percent_Oxygen", "Cell_Line", paste(nums_vars) )] %>%
  mutate(Days = rep("Days")) %>%
  unite(Days_grown, Days_Grown, Days) %>%
  droplevels()
pca_vars <- nums_vars[!nums_vars %in% c("Days_Grown", "Replicate_Line", "mitotic_age_per_day", "mitotic_age_per_doubling", "Intial_Mitotic_Age")]
datapca <- datapca %>% select(Percent_Oxygen, Cell_Line, Days_grown, paste(pca_vars))
pca.obj <- prcomp(datapca[,4:ncol(datapca)], center=TRUE, scale.=TRUE)
P <- ggbiplot(pca.obj,
         obs.scale = 1, 
         var.scale=1,
         ellipse=T,
         circle=F,
         varname.size=3,
         var.axes=T,
         groups=datapca$Days_grown, #no need for coloring, I'm making the points invisible
         alpha=0) #invisible points, I add them below
P$layers <- c(geom_point(aes(color=datapca$Days_grown), cex=3), P$layers) #add geom_point in a layer underneath (only way I have to change the size of the points in ggbiplot)

P

5.7.2 Cell Line grouping

datapca <- data_combined[,c("Percent_Oxygen", "Cell_Line", paste(nums_vars) )] %>%
  mutate(Days = rep("Days")) %>%
  unite(Days_grown, Days_Grown, Days) %>%
  droplevels()
pca_vars <- nums_vars[!nums_vars %in% c("Days_Grown", "Replicate_Line", "mitotic_age_per_day", "mitotic_age_per_doubling", "Intial_Mitotic_Age")]
datapca <- datapca %>% select(Percent_Oxygen, Cell_Line, Days_grown, paste(pca_vars))
pca.obj <- prcomp(datapca[,4:ncol(datapca)], center=TRUE, scale.=TRUE)
P <- ggbiplot(pca.obj,
         obs.scale = 1, 
         var.scale=1,
         ellipse=T,
         circle=F,
         varname.size=3,
         var.axes=T,
         groups=datapca$Cell_Line, #no need for coloring, I'm making the points invisible
         alpha=0) #invisible points, I add them below
P$layers <- c(geom_point(aes(color=datapca$Cell_Line), cex=3), P$layers) #add geom_point in a layer underneath (only way I have to change the size of the points in ggbiplot)

P

5.7.3 Oxygen grouping

datapca <- data_combined[,c("Percent_Oxygen", "Cell_Line", paste(nums_vars) )] %>%
  mutate(Days = rep("Days")) %>%
  unite(Days_grown, Days_Grown, Days) %>%
  droplevels()
pca_vars <- nums_vars[!nums_vars %in% c("Days_Grown", "Replicate_Line", "mitotic_age_per_day", "mitotic_age_per_doubling", "Intial_Mitotic_Age")]
datapca <- datapca %>% select(Percent_Oxygen, Cell_Line, Days_grown, paste(pca_vars))
pca.obj <- prcomp(datapca[,4:ncol(datapca)], center=TRUE, scale.=TRUE)
P <- ggbiplot(pca.obj,
         obs.scale = 1, 
         var.scale=1,
         ellipse=T,
         circle=F,
         varname.size=3,
         var.axes=T,
         groups=datapca$Percent_Oxygen, #no need for coloring, I'm making the points invisible
         alpha=0) #invisible points, I add them below
P$layers <- c(geom_point(aes(color=datapca$Percent_Oxygen), cex=3), P$layers) #add geom_point in a layer underneath (only way I have to change the size of the points in ggbiplot)

P

5.8 Corrplots

5.8.1 Option 1

## Corrplot side by side
par(mfrow = c(1, 2))
M <- cor(numdata_normoxia, method = "pearson", use = "complete.obs")
corrplot(M, method = "circle", tl.col = "black", tl.cex = 0.3, title = "21% Oxygen", 
    mar = c(0, 0, 4, 0))

P <- cor(numdata_hypoxia, method = "pearson", use = "complete.obs")
corrplot(P, method = "circle", tl.col = "black", tl.cex = 0.3, title = "3% Oxygen", 
    mar = c(0, 0, 4, 0))

5.8.2 Option 2

  • Normoxia data
## Similar heatmap with pvalues Compute correlation coefficients
test <- as.matrix(numdata_normoxia)
cor.coef <- cor(test)

# Compute correlation p-values
cor.test.p <- function(x) {
    FUN <- function(x, y) cor.test(x, y)[["p.value"]]
    z <- outer(colnames(x), colnames(x), Vectorize(function(i, j) FUN(x[, i], x[, 
        j])))
    dimnames(z) <- list(colnames(x), colnames(x))
    z
}
p <- cor.test.p(test)
# Create the heatmap
heatmaply_cor(cor.coef, k_col = 2, k_row = 2, node_type = "scatter", point_size_mat = -log10(p), 
    point_size_name = "-log10(p-value)", label_names = c("x", "y", "Correlation"), 
    fontsize_col = 6, fontsize_row = 6, main = "Normoxia")
  • Hypoxia data
## Similar heatmap with pvalues Compute correlation coefficients
test <- as.matrix(numdata_hypoxia)
cor.coef <- cor(test)

# Compute correlation p-values
cor.test.p <- function(x) {
    FUN <- function(x, y) cor.test(x, y)[["p.value"]]
    z <- outer(colnames(x), colnames(x), Vectorize(function(i, j) FUN(x[, i], x[, 
        j])))
    dimnames(z) <- list(colnames(x), colnames(x))
    z
}
p <- cor.test.p(test)
# Create the heatmap
heatmaply_cor(cor.coef, k_col = 2, k_row = 2, node_type = "scatter", point_size_mat = -log10(p), 
    point_size_name = "-log10(p-value)", label_names = c("x", "y", "Correlation"), 
    fontsize_col = 6, fontsize_row = 6, main = "Hypoxia")

5.8.3 Option 3

ax <- list(title = "", zeroline = F, showline = FALSE, showticklabels = TRUE, showgrid = TRUE)


# Compute a correlation matrix
corr_normoxia <- round(cor(numdata_normoxia), 1)

# Compute a matrix of correlation p-values
p.mat_normoxia <- ggcorrplot::cor_pmat(numdata_normoxia)

# Visualize the lower triangle of the correlation matrix Barring the no
# significant coefficient
corr.plot_norm <- ggcorrplot::ggcorrplot(corr_normoxia, hc.order = F, type = "lower", 
    outline.col = "gray", p.mat = p.mat_normoxia, sig.level = 0.05, insig = "blank", 
    tl.cex = 6, legend.title = "Corr with p<0.001") + labs(title = "21% Oxygen") + 
    # theme(axis.text.x = element_text(margin=margin(-2,0,0,0))) +
coord_fixed(ratio = 0.5)
# corr.plot_norm
corr.plot_norm <- ggplotly(corr.plot_norm)
corr.plot_norm <- corr.plot_norm %>% plotly::layout(xaxis = ax, yaxis = ax)
corr.plot_norm
# Compute a correlation matrix
corr_hypoxia <- round(cor(numdata_hypoxia), 1)

# Compute a matrix of correlation p-values
p.mat_hypoxia <- ggcorrplot::cor_pmat(numdata_hypoxia)

# Visualize the lower triangle of the correlation matrix Barring the no
# significant coefficient
corr.plot_hypoxia <- ggcorrplot::ggcorrplot(corr_hypoxia, hc.order = F, type = "lower", 
    outline.col = "gray", p.mat = p.mat_hypoxia, sig.level = 0.05, insig = "blank", 
    tl.cex = 6, legend.title = "Corr with p<0.001") + labs(title = "3% Oxygen") + 
    # theme(axis.text.x = element_text(margin=margin(-2,0,0,0))) +
coord_fixed(ratio = 0.5)
# corr.plot_norm
corr.plot_hypoxia <- ggplotly(corr.plot_hypoxia)
corr.plot_hypoxia <- corr.plot_hypoxia %>% plotly::layout(xaxis = ax, yaxis = ax)
corr.plot_hypoxia
# #corr_normoxia[45:50,40:45] #corr_hypoxia[45:50,40:45] corr_normoxia[-0.5 <
# corr_normoxia & corr_normoxia < 0.5] <- NA corr_hypoxia[-0.5 < corr_hypoxia &
# corr_hypoxia < 0.5] <- NA test <- corr_normoxia - corr_hypoxia
# #test[45:50,40:45] test[-1 < test & test < 1] <- NA test[is.na(test)] <- 0
# heatmaply_cor( test, label_names = c('x', 'y', 'Difference 21% to 3%'),
# fontsize_col = 6, fontsize_row = 6, main='Hypoxia' )
# library(Hmisc) # ++++++++++++++++++++++++++++ # flattenCorrMatrix #
# ++++++++++++++++++++++++++++ # cormat : matrix of the correlation coefficients
# # pmat : matrix of the correlation p-values flattenCorrMatrix <-
# function(cormat, pmat) { ut <- upper.tri(cormat) data.frame( row =
# rownames(cormat)[row(cormat)[ut]], column = rownames(cormat)[col(cormat)[ut]],
# cor =(cormat)[ut], p = pmat[ut] ) } res2<-rcorr(as.matrix(numdata_normoxia),
# type= 'pearson') sig_corr_norm <- flattenCorrMatrix(res2$r, res2$P)
# sig_corr_norm[which(sig_corr_norm$cor < 0.5 & sig_corr_norm$cor > -0.5), ] = NA
# sig_corr_norm[which(sig_corr_norm$p > 0.05), ] = NA sig_corr_norm <-
# na.omit(sig_corr_norm)%>% select(-c(cor, p))%>% as.data.frame() %>%
# rownames_to_column() res2<-rcorr(as.matrix(numdata_hypoxia), type= 'pearson')
# sig_corr_hypo <- flattenCorrMatrix(res2$r, res2$P)
# sig_corr_hypo[which(sig_corr_hypo$cor < 0.5 & sig_corr_hypo$cor > -0.5), ] = NA
# sig_corr_hypo[which(sig_corr_hypo$p > 0.05), ] = NA sig_corr_hypo <-
# na.omit(sig_corr_hypo) %>% select(-c(cor, p)) %>% as.data.frame() %>%
# rownames_to_column() corr_in_hypo <- anti_join(sig_corr_hypo, sig_corr_norm)
# cor_list <- unique(c(corr_in_hypo$row, corr_in_hypo$column ))
# sig_corr_hypo[!sig_corr_hypo[,'rowname'] %in% sig_corr_norm[,'rowname'],]
# #corr_in_normo <- sig_corr_norm[!sig_corr_norm[,'rowname'] %in%
# sig_corr_norm[,'rowname'],] cor_list <- unique(c(corr_in_hypo$row,
# corr_in_hypo$column#, #corr_in_normo$row, # corr_in_normo$column )) norm <-
# data.frame(row = c('mito', NA, NA), column = c('glyc', NA, NA))
# #%>%unite(group, row, column) hyp <- data.frame(row = c('mito', 'a', 'c'),
# column = c('glyc', 'b','d')) #%>%unite(group, row, column) full_join(norm, hyp)
# diffdf::diffdf(norm, hyp) hyp[which(norm$group %in% hyp$group)] anti_join(hyp,
# norm)

5.9 Pairs

  • Here I select only the features that are up or down in hypoxia
  • The aim is to compare distributions, correlations etc
  • Find features that are solely correlated in either condition
data_combined %>%
  select(Percent_Oxygen, paste(Lower_in_hypoxia))%>%#,  paste(Higher_in_hypoxia)) %>% 
  ggpairs(., mapping = ggplot2::aes(color=Percent_Oxygen), 
          lower = list(continuous = wrap("smooth",  alpha = 0.3, size=0.1)),
          upper = list(continuous = wrap("cor", size = 2))) +
  theme_bw() +
  theme(strip.placement = "outside", 
        text = element_text(size = 1),
        strip.text.x = element_text(size = 3),
        strip.text.y = element_text(size = 2))

# p<-   ggpairs(d, mapping = ggplot2::aes(color=Percent_Oxygen), 
#           lower = list(continuous = wrap("smooth", alpha = 0.3, size=0.1)),
#           upper = list(continuous = wrap("cor", size = 2))) +
#   theme(strip.placement = "outside", text = element_text(size = 5))

#ggplotly(p)
# pairs(subset, main = "test",
#       pch = 21, bg = c("red", "blue")[unclass(subset$Percent_Oxygen)])

# library(GGally)
# d <- highlight_key(iris)
# p <- ggpairs(d, aes(colour = Species), columns = 1:5)
# ggplotly(p) %>% 
#   highlight("plotly_selected")
df = read.table("https://raw.githubusercontent.com/clarewest/RFQAmodel/master/data/RFQAmodel_training.txt", 
    header = TRUE, stringsAsFactors = FALSE) %>% filter(Target %in% c("2OBA_B", "3HSB_D")) %>% 
    select(Target, EigenTHREADER, SAINT2, TMScore)

ggplot2::theme_set(ggplot2::theme_bw())  ## (this just globally sets the ggplot2 theme to theme_bw) 
df %>% select(-Target) %>% ggpairs()

df %>% ggpairs(., mapping = ggplot2::aes(colour = Target), lower = list(continuous = wrap("smooth", 
    alpha = 0.3, size = 0.1)))

# data1<- data_combined %>% select(Percent_Oxygen, Days_Grown, Telomere_Length)
# %>% group_by(Percent_Oxygen, Days_Grown)%>% summarise_at(c('Telomere_Length'),
# mean) %>% # summarise(TL = mean(Telomere_Length, na.rm = TRUE)) %>%
# spread(Percent_Oxygen, Telomere_Length) data2 <- data_combined %>%
# select(Percent_Oxygen, Days_Grown, ATPox, ATPglyc, cf_nDNA, cf_mtDNA) %>%
# group_by(Percent_Oxygen, Days_Grown)%>% summarise_at(c('Telomere_Length',
# 'ATPox'), mean) summarise(TL = mean(Telomere_Length, na.rm = TRUE)) %>%
# spread(Percent_Oxygen, TL, ATPox, ATPglyc, cf_nDNA, cf_mtDNA)



# , ATPox, ATPglyc, cf_nDNA, cf_mtDNA)
# df <- data_normoxia %>% select_if(is.numeric) %>% cor() %>% round(digits = 2)
# %>% as.data.frame() %>% filter_all(any_vars(abs(.) > 0.5)) df <- data_normoxia
# %>% select_if(is.numeric) %>% cor() %>% round(digits = 2) %>% as.data.frame()
# %>% select_if(funs(any(abs(.) > 0.5))) temp = cor(mtcars) temp[rowSums(temp >
# 0.4) > 0, colSums(temp > 0.4) > 0] df1 <- data_normoxia %>%
# select_if(is.numeric) %>% cor() %>% round(digits = 2) %>% as.data.frame() %>%
# rownames_to_column %>% gather(colname, value, -rowname) %>% filter(abs(value)
# >= 0.4)
# nvars <- colnames(data_hypoxia) # To make things easy, the variable names are
# changed into x1 to xn newvarnames <- paste( 'x',1:nvars, sep='' ) # Add the new
# variable names to the matrices dimnames( cormat1 ) <- dimnames( cormat2 ) <-
# list( newvarnames, newvarnames ) # Here's the model model <- paste( 'X',
# 1:nvars, ' =~ ', 'x', 1:nvars, ';', sep='' ) # Fit the model fit <- sem( model,
# sample.cov = list( cormat1, cormat2 ), sample.nobs = c( nobs1, nobs2 ),
# group.equal = 'lv.covariances' ) # This is the outcome of the (Chi^2) test
# lavTestLRT( fit )[ 2, 5:7 ] sqrt(M^2) A <- sqrt(M^2)-sqrt(P^2) Heatmap(A)
# corrplot(A, method = 'circle', tl.col = 'black', tl.cex=0.3, title='21-3%
# Oxygen') library(ggplot2) library(plotly) library(reshape) library(Hmisc)
# library(viridis) x <- data_normoxia y <- as.matrix(x) r <- rcorr(y) m1 <-
# reshape::melt(r$r) m2 <- reshape::melt(r$P) p.value <- m2$value p <- ggplot(m1,
# aes(X1, X2, fill = value, label=p.value)) + geom_tile() + labs(x='',y='') +
# scale_fill_viridis() p ggplotly(p) ### ## # library(plotly) # trace1 <- list( #
# type = 'heatmap', # x = nums_vars, # y = nums_vars, # z = M # ) # data <-
# list(trace1) # layout <- list(title = 'Features Correlation Matrix') # p <-
# plot_ly() # p <- add_trace(p, type=trace1$type, x=trace1$x, y=trace1$y,
# z=trace1$z) # p <- layout(p, title=layout$title) # p # Compute correlation
# coefficients cor.coef <- cor(df) # Compute correlation p-values cor.test.p <-
# function(x){ FUN <- function(x, y) cor.test(x, y)[['p.value']] z <- outer(
# colnames(x), colnames(x), Vectorize(function(i,j) FUN(x[,i], x[,j])) )
# dimnames(z) <- list(colnames(x), colnames(x)) z } p <- cor.test.p(test) #
# Create the heatmap heatmaply_cor( cor.coef, node_type = 'scatter',
# point_size_mat = -log10(p), point_size_name = 'p-value', label_names = c('x',
# 'y', 'Correlation') )
# index=sapply(iris, is.numeric) iris_numeric<-iris[index==TRUE] #p.mat <-
# cor.mtest(iris_numeric)$p p.mat <- corrr::correlate(iris_numeric)%>%as.matrix()
# iris_numeric%>% cor() %>% corrplot::corrplot( method = 'color',order =
# 'hclust',p.mat = p.mat, sig.level = 0.01 , pch.col = 'white',col = brewer.pal(n
# = 11, name = 'PuOr')) M <- cor(iris_numeric) corrplot.mixed(M,upper = 'color',
# tl.col = 'black') iris_numeric%>%cor()%>%corrplot::corrplot() library(ggplot2)
# library(plotly) library(reshape) library(Hmisc) library(viridis) x <-
# iris_numeric y <- as.matrix(x) r <- rcorr(y) m1 <- melt(r$r) m2 <- melt(r$P)
# p.value <- m2$value p <- ggplot(m1, aes(X1, X2, fill = value, label=p.value)) +
# geom_tile() + labs(x='',y='')+ scale_fill_viridis() ggplotly(p)

5.10 Radar plots

6 Results summary

7 Other stuff

library("PerformanceAnalytics")
my_data <- mtcars[, c(1, 3, 4, 5, 6, 7)]
chart.Correlation(my_data, histogram = TRUE, pch = 19)

ggpubr::ggscatter(data_combined, x = "Days_Grown", y = "Telomere_Length", color = "Cell_Line", 
    palette = "jco", add = "reg.line") + facet_wrap(~Cell_Line) + ggpubr::stat_cor(label.y = 4.4) + 
    ggpubr::stat_regline_equation(label.y = 4.2)
Option 2: For detailed analysis

Figure 7.1: Option 2: For detailed analysis

# > `geom_smooth()` using formula 'y ~ x'
pairs(t(exprs[1:5, ]), main = "Anderson's Iris Data -- 3 species", pch = 21, bg = c("#1b9e77", 
    "#d95f02", "#7570b3")[unclass(data_combined$Cell_Line)])
Option 1

Figure 7.2: Option 1

library(WVPlots)

PairPlot(data_combined, colnames(data_combined)[18:21], "Anderson's Iris Data -- 3 species", 
    group_var = "Cell_Line")

8 Notes Anna

8.1 The following code is used for collapsable items:

some output

s <- 1 + 1
s
[1] 2

8.2 The following code is used for two figures side by side

r out.width=c(‘50%’, ‘50%’), fig.show=‘hold’

8.3 Text side by side



Since R Markdown use the bootstrap framework under the hood. It is possible to benefit its powerful grid system. Basically, you can consider that your row is divided in 12 subunits of same width. You can then choose to use only a few of this subunits.



Here, I use 3 subunits of size 4 (4x3=12). The last column is used for a plot. You can read more about the grid system here. I got this result showing the following code in my R Markdown document.

8.4 Add tabs

8.4.1 Tab1

8.4.2 Tab2

8.5 Data tables

library(DT)
datatable(mtcars, rownames = FALSE, filter = "top", options = list(pageLength = 5, 
    scrollX = T))

8.6 Highlight text

  • This is my first conclusion
  • This is my second conclusion
  • This is my first conclusion
  • This is my second conclusion

8.6.0.1 AF0951

8.7 Second way

mtcars[1:5, "mpg", drop = FALSE]
                   mpg
Mazda RX4         21.0
Mazda RX4 Wag     21.0
Datsun 710        22.8
Hornet 4 Drive    21.4
Hornet Sportabout 18.7

8.8 Text formatting

R markdown allows to easily format your text. You can add links, write in bold or italic. This is very well explained in the Rstudio cheatsheet.

8.9 Skip a line

text



text

8.10 Interactive graphics

library(plotly)
set.seed(100)
d <- diamonds[sample(nrow(diamonds), 1000), ]
plot_ly(d, x = ~carat, y = ~price, color = ~carat, size = ~carat, text = ~paste("Clarity: ", 
    clarity))

8.11 Feature ratio

  • TBD (take from 2020-10-05)
  • I